/*SAS code for Mizrach, Swanson, and Yu (2018), "Disentangling the Effects of News, Small Jumps, and Large Jumps on Stock Return Predictability" Macro used: 1. Macro single_sort: replicating results in table 4-7; 2. Macro doublesort_data and doublesort: replicating results in table 8-12; 3. Macro control_sort: replicating results in table 3; 4. Macro sharpe: replicating figure 3; Note: Most of the inuput datasets contain variables "PERMNO", "endofweek", and corresponding weekly realized measures; */ /*Single Portfolio sorts: quintile portfolios*/ libname taq "E:\taq"; %let measure_list =rvol rsj rsk rkt rvc rvjp rvjn srvj rvljp rvljn srvlj rvsjp rvsjn srvsj; %macro single_sort(measure_list=); %local i measure_name; %let i=1; %do %while (%scan(&measure_list, &i) ne ); %let measure_name = %scan(&measure_list, &i); *sorting variable; %let rankvar = weekly_&measure_name; *depdendent variable; %let var = leadret; *time variable and number of groups; %let timevar = endofweek; *%let n=5;*10 groups for example; options notes; proc sql; create table sample as select a.*, b.me, c.&rankvar from taq.leadret as a, taq.me as b, taq.weekly_&measure_name as c where a.permno=b.permno=c.permno and a.endofweek=b.endofweek=c.endofweek; quit; data sample2; set sample; leadret=leadret*10000; keep permno endofweek leadret &rankvar me; run; proc sort; by &timevar; run; /***breakpoints****/ proc univariate data=sample2 noprint; var &rankvar; by endofweek; output out=break pctlpts= 20 40 60 80 pctlpre=dec; run; proc sql; create table port as select a.*, b.dec20, b.dec40,b.dec60,b.dec80 from sample2 as a left join break as b on a.endofweek=b.endofweek; quit; data port2; set port; If &rankvar^=. then do; if &rankvar<=dec20 then group=1; else if &rankvar>dec20 and &rankvar<=dec40 then group=2; else if &rankvar>dec40 and &rankvar<=dec60 then group=3; else if &rankvar>dec60 and &rankvar<=dec80 then group=4; else if &rankvar>dec80 then group=5; else group=.; end; drop dec:; run; proc sort data=port2; by &timevar group; run; data port2; set port2; rename me= weight_port; run; /*********************************/ proc means data=port2 noprint; by &timevar group; var &var; output out=ewmeanret mean=; run; data ewmeanret; set ewmeanret; weight='EW'; run; proc means data=port2 noprint; by &timevar group; var &var; weight weight_port; output out=vwmeanret mean=; run; data vwmeanret; set vwmeanret; weight='VW'; run; data meanret; set ewmeanret vwmeanret; run; data meanret; set meanret; if group=. then delete; run; /************************/ %let data=meanret; %let lag=3; %let byvar=weight; %let rankvar2=group; proc sort data=&data; by &byvar &rankvar2 &timevar; run; proc transpose data=&data out=data; by &byvar &rankvar2 &timevar; var &var; run; *Find H-L difference; proc sort data=&data out=sum; by &byvar &timevar &rankvar2; run; data sum_diff; set sum; by &byvar &timevar &rankvar2; if first.&timevar or last.&timevar; if first.&timevar then &rankvar2=1; if last.&timevar then &rankvar2=2; run; proc transpose data=sum_diff out=sum_diff2; by &byvar &timevar; var &var; id &rankvar2; run; data sum_diff2; set sum_diff2; &rankvar2=99; col1 = _2 - _1; drop _2 _1; run; data sum_diff2; set data sum_diff2; run; proc sort data=sum_diff2; by &byvar _name_ &rankvar2; run; proc sql; create table sum_diff3 as select a.*,(a.col1-b.rf*10000) as exret, b.mktrf*10000 as mkt_rf, b.smb*10000 as smb,b.hml*10000 as hml, b.umd*10000 as umd from sum_diff2 as a left join taq.ffc4 as b on intck('week1.4',a.endofweek, b.endofweek)=1; quit; /************************************************/ proc sort data=sum_diff3; by &byvar _name_ &rankvar2 &timevar; run; data sum_diff3; set sum_diff3; if &rankvar2=99 then exret=col1; run; proc sort data=sum_diff3; by &byvar _name_ &rankvar2 &timevar; run; *** average return; options nonotes; proc model data=sum_diff3; by &byvar _name_ &rankvar2; parms a; exogenous col1 ; instruments / intonly; col1=a; fit col1 / gmm kernel=(bart, %eval(&lag+1), 0);/*lag=6;*/ ods output parameterestimates=param0 fitstatistics=fitresult OutputStatistics=residual; quit; data param0; set param0; type='Average ret'; run; options nonotes; proc model data=sum_diff3; by &byvar &rankvar2 ; parms a b1 b2 b3 b4; instruments mkt_rf smb hml umd; exret =a+b1*mkt_rf+b2*smb+b3*hml+b4*umd; fit exret / gmm kernel=(bart, %eval(&lag+1), 0); ods output parameterestimates=param3 fitstatistics=fitresult OutputStatistics=residual; quit; data param3; set param3; type='FFC4 Alpha'; if parameter='a'; run; data param_&measure_name; set param0 param3; run; data try_&measure_name; set param_&measure_name; if probt<0.1 then p='* '; if probt<0.05 then p='** '; if probt<0.01 then p='***';tvalue2=put(tvalue,7.2); est=put(estimate, 12.2); param=est; if group=99 then PARAM=compress(est||p); T=compress('('||tvalue2||')'); rename _name_=name;run; data try2_&measure_name; set try_&measure_name; keep weight group type param T; run; PROC EXPORT DATA= try2_&measure_name outfile= "E:\single_sort.xlsx " dbms=xlsx replace; sheet="&measure_name"; run; %let i = %eval(&i + 1); %end; %mend single_sort; %single_sort(measure_list=&measure_list); /**Double sorts**/ %let measure_list =rsk rvljp rvljn srvlj rvsjp rvsjn srvsj; %macro doublesort_data(measure_name1=,measure_name2=); %let rankvar1 = weekly_&measure_name1; %let rankvar2 = weekly_&measure_name2; *depdendent variable; %let var = leadret; *time variable and number of groups; %let timevar = endofweek; options notes; proc sql; create table sample as select a.*, b.me, c.&rankvar1,d.&rankvar2 from taq.leadret as a, taq.me as b, taq.weekly_&measure_name1 as c,taq.weekly_&measure_name2 as d where a.permno=b.permno=c.permno=d.permno and a.endofweek=b.endofweek=c.endofweek=d.endofweek; quit; data sample2; set sample; leadret=leadret*10000; keep permno endofweek leadret &rankvar1 &rankvar2 me; run; proc sort; by &timevar; run; /***breakpoints****/ proc univariate data=sample2 noprint; var &rankvar1; by endofweek; output out=&rankvar1._break pctlpts= 20 40 60 80 pctlpre=rank1var; run; proc sql; create table port_var1 as select a.*, b.rank1var20, b.rank1var40,b.rank1var60,b.rank1var80 from sample2 as a, &rankvar1._break as b where a.endofweek=b.endofweek; quit; data port2_var1; set port_var1; If &rankvar1^=. then do; if &rankvar1<=rank1var20 then group1=1; else if &rankvar1>rank1var20 and &rankvar1<=rank1var40 then group1=2; else if &rankvar1>rank1var40 and &rankvar1<=rank1var60 then group1=3; else if &rankvar1>rank1var60 and &rankvar1<=rank1var80 then group1=4; else if &rankvar1>rank1var80 then group1=5; else group1=.; end; drop rank1var:; run; proc sort data=port2_var1; by &timevar group1; run; data port2_var1; set port2_var1; rename me= weight_port; run; proc univariate data=port2_var1 noprint; var &rankvar2; by endofweek group1; output out=&rankvar2._break pctlpts= 20 40 60 80 pctlpre=rank2var; run; proc sql; create table port_var2 as select a.*, b.rank2var20, b.rank2var40,b.rank2var60,b.rank2var80 from port2_var1 as a, &rankvar2._break as b where a.endofweek=b.endofweek and a.group1=b.group1; quit; data port2_var2; set port_var2; If &rankvar2^=. then do; if &rankvar2<=rank2var20 then group2=1; else if &rankvar2>rank2var20 and &rankvar2<=rank2var40 then group2=2; else if &rankvar2>rank2var40 and &rankvar2<=rank2var60 then group2=3; else if &rankvar2>rank2var60 and &rankvar2<=rank2var80 then group2=4; else if &rankvar2>rank2var80 then group2=5; else group2=.; end; drop rank2var:; run; proc sort data=port2_var2; by &timevar group1 group2; run; /************************************************************************************/ proc means data=port2_var2 noprint; by &timevar group1 group2; var &var; output out=ewmeanret mean=; run; data ewmeanret; set ewmeanret; weight='EW'; run; proc means data=port2_var2 noprint; by &timevar group1 group2; var &var; weight weight_port; output out=vwmeanret mean=; run; data vwmeanret; set vwmeanret; weight='VW'; run; /*****/ data meanret; set ewmeanret vwmeanret; run; data meanret; set meanret; if group1=. or group2=. then delete; run; %mend doublesort_data; /************************/ %macro doublesort(out1=); %let data=meanret; %let lag=3; *lag=6; %let byvar=weight; %let rankvar1=group1; %let rankvar2=group2; %let var = leadret; %let timevar = endofweek; proc sort data=&data; by &byvar &rankvar1 &rankvar2 &timevar;run; proc transpose data=&data out=data; by &byvar &rankvar1 &rankvar2 &timevar; var &var;run; proc sort data=data; by _name_ &byvar &rankvar1 &rankvar2 &timevar;run; proc sort data=&data out=sum; by &byvar &rankvar1 &timevar &rankvar2; run; data sum_diff; set sum(where=(&rankvar2>-1)); by &byvar &rankvar1 &timevar &rankvar2; if first.&timevar or last.&timevar; if first.&timevar then &rankvar2=1; if last.&timevar then &rankvar2=2; run; proc transpose data=sum_diff out=sum_diff2; by &byvar &rankvar1 &timevar; var &var; id &rankvar2; run; data sum_diff2; set sum_diff2; &rankvar2=99; col1 = _2 - _1; drop _2 _1; run; proc sql; create table sum_diff2_temp as select a.weight, a.group1, a.group2, a.endofweek, a._name_, a.col1 from sum_diff2 as a; quit; data sum_diff2; set data sum_diff2_temp;run; proc sort data=sum_diff2; by _name_ &byvar &rankvar2 &timevar &rankvar1; run; proc summary data=sum_diff2; by _name_ &byvar &rankvar2 &timevar; var col1; output out=sum_diff3 mean=mean_group1; run; data sum_diff3; set sum_diff3; &rankvar1=100; col1 =mean_group1; run; proc sql; create table sum_diff33 as select a.weight, a.group1, a.group2, a.endofweek, a._name_, a.col1 from sum_diff3 as a; quit; proc sort data=sum_diff33; by _name_ &byvar &rankvar1 &rankvar2 &timevar; run; proc sql; create table sum_diff23 as select a.*,(a.col1-b.rf*10000) as exret, b.mktrf*10000 as mkt_rf, b.smb*10000 as smb,b.hml*10000 as hml, b.umd*10000 as umd from sum_diff2 as a left join taq.ffc4 as b on intck('week1.4',a.endofweek, b.endofweek)=1; quit; data sum_diff23; set sum_diff23; if &rankvar1=100 or &rankvar2=99 then exret=col1; run; proc sort data=sum_diff23; by _name_ &byvar &rankvar1 &rankvar2 &timevar; run; proc sql; create table sum_diff34 as select a.*,(a.col1-b.rf*10000) as exret, b.mktrf*10000 as mkt_rf, b.smb*10000 as smb,b.hml*10000 as hml, b.umd*10000 as umd from sum_diff33 as a left join taq.ffc4 as b on intck('week1.4',a.endofweek, b.endofweek)=1; quit; data sum_diff34; set sum_diff34; if &rankvar1=100 or &rankvar2=99 then exret=col1; run; proc sort data=sum_diff34; by _name_ &byvar &rankvar1 &rankvar2 &timevar; run; ***average return; options nonotes; proc model data=sum_diff23; by &byvar _name_ &rankvar1 &rankvar2; parms a; exogenous col1 ; instruments / intonly; col1=a; fit col1 / gmm kernel=(bart, %eval(&lag+1), 0); ods output parameterestimates=param0 fitstatistics=fitresult OutputStatistics=residual; quit; data param0; set param0; type='Average ret';run; proc model data=sum_diff34; by &byvar _name_ &rankvar1 &rankvar2; parms a; exogenous col1 ; instruments / intonly; col1=a; fit col1 / gmm kernel=(bart, %eval(&lag+1), 0); ods output parameterestimates=param0_ave fitstatistics=fitresult OutputStatistics=residual; quit; data param0_ave; set param0_ave; type='Average ret';run; *Four factor alpha*; options nonotes; proc model data=sum_diff23; by &byvar _name_ &rankvar1 &rankvar2; parms a b1 b2 b3 b4; instruments mkt_rf smb hml umd; exret =a+b1* mkt_rf+b2*smb+b3*hml+b4*umd; fit exret / gmm kernel=(bart, %eval(&lag+1), 0); ods output parameterestimates=param3 fitstatistics=fitresult OutputStatistics=residual; quit; data param3; set param3; type='Carhart4 Alpha'; if parameter='a'; run; proc model data=sum_diff34; by &byvar _name_ &rankvar1 &rankvar2; parms a b1 b2 b3 b4; instruments mkt_rf smb hml umd; exret =a+b1*mkt_rf+b2*smb+b3*hml+b4*umd; fit exret / gmm kernel=(bart, %eval(&lag+1), 0); ods output parameterestimates=param3_ave fitstatistics=fitresult OutputStatistics=residual; quit; data param3_ave; set param3_ave; type='Carhart4 Alpha'; if parameter='a'; run; PROC EXPORT DATA=param0 outfile= "E:\&out._param.xlsx" dbms=xlsx replace; sheet="return"; run; PROC EXPORT DATA=param3 outfile= "E:\&out._param.xlsx" dbms=xlsx replace; sheet="alpha"; run; PROC EXPORT DATA=param0_ave outfile= "E:\&out._param.xlsx" dbms=xlsx replace; sheet="return_ave"; run; PROC EXPORT DATA=param3_ave outfile= "E:\&out._param.xlsx" dbms=xlsx replace; sheet="alpha_ave"; run; %mend doublesort; %doublesort_data(measure_name1=rsk, measure_name2=srvsj); %doublesort(out1=rsk_srvsj); /*Replicating results in table 2*/ /*Summary statistics and correlations*/ /*correlation of all variables*/ proc corr data=taq.all_variables outp=pearson outs=spearman noprint; by endofweek; var weekly_srvj weekly_rvjp weekly_rvjn weekly_srvlj weekly_rvljp weekly_rvljn weekly_srvsj weekly_rvsjp weekly_rvsjn weekly_rvol weekly_rsk weekly_rkt beta logme beme mom rev ivol csk ckt max_logret min_logret log_illiq; run; data pearson; set pearson; by endofweek; retain id; if first.endofweek then id=0; id=id+1; if _name_='' then _name_=_type_; run; proc sort data=pearson; by id _name_ endofweek; run; data pearson2; set pearson; by id _name_ endofweek; _name_=id; run; proc summary data=pearson2; by id; var weekly_srvj weekly_rvjp weekly_rvjn weekly_srvlj weekly_rvljp weekly_rvljn weekly_srvsj weekly_rvsjp weekly_rvsjn weekly_rvol weekly_rsk weekly_rkt beta logme beme mom rev ivol csk ckt max_logret min_logret log_illiq; output out=pearson3 mean= std= / autoname; run; PROC EXPORT DATA=pearson3 outfile= "E:\correlation.xlsx " dbms=xlsx replace; sheet="Summary_corr1"; run; data spearman; set spearman; by endofweek; retain id; if first.endofweek then id=0; id=id+1; if _name_='' then _name_=_type_; run; proc sort data=spearman; by id _name_ endofweek; run; data spearman2; set spearman; by id _name_ endofweek; _name_=id; run; proc summary data=spearman2; by id; var weekly_srvj weekly_rvjp weekly_rvjn weekly_srvlj weekly_rvljp weekly_rvljn weekly_srvsj weekly_rvsjp weekly_rvsjn weekly_rvol weekly_rsk weekly_rkt beta logme beme mom rev ivol csk ckt max_logret min_logret log_illiq; output out=spearman3 mean= std= / autoname; run; PROC EXPORT DATA=spearman3 outfile= "E:\correlation.xlsx " dbms=xlsx replace; sheet="Summary_corr2"; run; /******************************************************/ /*Replicating results in table 3*/ /**sort and summary statistics of control variables***/ %let var_list =weekly_srvj weekly_rvjp weekly_rvjn weekly_srvlj weekly_rvljp weekly_rvljn weekly_srvsj weekly_rvsjp weekly_rvsjn weekly_rvol weekly_rsk weekly_rkt beta logme beme mom rev ivol csk ckt max_logret min_logret log_illiq; %let measure_list =srvj rvjp rvjn srvlj rvljp rvljn srvsj rvsjp rvsjn rvol rsk rkt; %macro control_sort(measure_list= , var_list=); %local i measure_name; %let i=1; %do %while (%scan(&measure_list, &i) ne ); %let measure_name = %scan(&measure_list, &i); *sorting variable; %let rankvar = weekly_&measure_name; *depdendent variable; *%let var = &var_list; *time variable and number of groups; %let timevar = endofweek; *%let n=5;*10 groups for example; options notes; proc sort data=taq.sample out=sample2; by &timevar; run; /***breakpoints****/ proc univariate data=sample2 noprint; var &rankvar; by endofweek; output out=break pctlpts= 20 40 60 80 pctlpre=dec; run; proc sql; create table port as select a.*, b.dec20, b.dec40,b.dec60,b.dec80 from sample2 as a left join break as b on a.endofweek=b.endofweek; quit; data port2; set port; If &rankvar^=. then do; if &rankvar<=dec20 then group=1; else if &rankvar>dec20 and &rankvar<=dec40 then group=2; else if &rankvar>dec40 and &rankvar<=dec60 then group=3; else if &rankvar>dec60 and &rankvar<=dec80 then group=4; else if &rankvar>dec80 then group=5; else group=.; end; drop dec:; run; proc sort data=port2; by &timevar group; run; /***************************/ proc means data=port2 noprint; by &timevar group; var &var_list; output out=ewmeanret mean=; run; /************************/ %let data=ewmeanret; %let rankvar2=group; proc sort data=&data; by &rankvar2 &timevar; run; proc transpose data=&data out=data; by &rankvar2 &timevar; var &var_list; run; proc sort data=data; by _name_ &rankvar2; run; proc sort data=data; by _name_ &rankvar2 &timevar; run; proc means data=data noprint; by _name_ &rankvar2; var col1; output out=mean_port(drop=endofweek) mean=; run; PROC EXPORT DATA=mean_port outfile= "E:\sort_control.xlsx " dbms=xlsx replace; sheet="&measure_name"; run; %let i = %eval(&i + 1); %end; %mend control_sort; %control_sort(measure_list=&measure_list, var_list=&var_list); /***********************************************************/ /*Replicating figure 1*/ /*Plot kernel density**/ options notes; data plot; set taq.weekly_rkt; run; proc sort; by endofweek; run; proc sgplot data=plot; density weekly_rkt/type=kernel ; xaxis label="RKT"; RUN; /*************************************************************/ /*Replicating figure 2*/ /**Plot time series percentage**/ proc sort data=taq.weekly_srvsj out=sample2; by endofweek; run; /***breakpoints****/ proc univariate data=sample2 noprint; var weekly_srvsj; by endofweek; output out=break pctlpts= 10 50 90 pctlpre=dec; run; data break; set break; count=1; run; proc sort data=break; by endofweek; run; proc expand data=break out=break2 method=none; *by permno; id endofweek; convert dec10 = ave_dec10 / transformout=( movsum 10 ); convert dec50 = ave_dec50 / transformout=( movsum 10 ); convert dec90 = ave_dec90 / transformout=( movsum 10 ); convert count=n/transformout=( movsum 10 ); quit; data break3; set break2; if n>=10 then do; meandec10=(ave_dec10/n); meandec50=(ave_dec50/n); meandec90=(ave_dec90/n); end; run; data break4; set break3; where meandec10^=. and meandec50^=. and meandec90^=.; run; proc sort data=break4; by endofweek; run; proc sgplot data =break4; series x=endofweek y=meandec10 /legendlabel='10th' name='lineA' lineattrs=(pattern=shortdash); series x=endofweek y=meandec50 /legendlabel='50th' name='lineB' lineattrs=(pattern=dashdotdot); series x=endofweek y=meandec90 /legendlabel='90th' name='lineC' lineattrs=(pattern=solid); format endofweek year4.; xaxis values=('01jan1993'd to '31dec2016'd by year); yaxis label="SRVSJ"; keylegend 'lineA' 'lineB' 'lineC'/ location=inside position=topright; RUN; /***************************************************************/ /*Replicating figure 3*/ /***Sharpe ratio and cumulative return***/ %let measure_list = rsj rsk srvj srvlj srvsj; %macro sharpe(measure_list=); %local i measure_name; %let i=1; %do %while (%scan(&measure_list, &i) ne ); %let measure_name = %scan(&measure_list, &i); *sorting variable; %let rankvar = weekly_&measure_name; *depdendent variable; %let var = leadret; *time variable and number of groups; %let timevar = endofweek; *%let n=5;*10 groups for example; options notes; proc sql; create table sample as select a.*, b.me, c.&rankvar from taq.leadret as a, taq.me as b, taq.weekly_&measure_name as c where a.permno=b.permno=c.permno and a.endofweek=b.endofweek=c.endofweek; quit; data sample2; set sample; leadret=leadret*10000; keep permno endofweek leadret &rankvar me; run; proc sort; by &timevar; run; /***breakpoints****/ proc univariate data=sample2 noprint; var &rankvar; by endofweek; output out=break pctlpts= 20 40 60 80 pctlpre=dec; run; proc sql; create table port as select a.*, b.dec20, b.dec40,b.dec60,b.dec80 from sample2 as a left join break as b on a.endofweek=b.endofweek; quit; data port2; set port; If &rankvar^=. then do; if &rankvar<=dec20 then group=1; else if &rankvar>dec20 and &rankvar<=dec40 then group=2; else if &rankvar>dec40 and &rankvar<=dec60 then group=3; else if &rankvar>dec60 and &rankvar<=dec80 then group=4; else if &rankvar>dec80 then group=5; else group=.; end; drop dec:; run; proc sort data=port2; by &timevar group; run; data port2; set port2; rename me= weight_port; run; /************************************************************************************/ proc means data=port2 noprint; by &timevar group; var &var; output out=ewmeanret mean=; run; data ewmeanret; set ewmeanret; weight='EW'; run; proc means data=port2 noprint; by &timevar group; var &var; weight weight_port; output out=vwmeanret mean=; run; data vwmeanret; set vwmeanret; weight='VW'; run; data meanret; set ewmeanret vwmeanret; run; data meanret; set meanret; if group=. then delete; run; /************************/ %let data=meanret; %let lag=3; %let byvar=weight; %let rankvar2=group; proc sort data=&data; by &byvar &rankvar2 &timevar; run; proc transpose data=&data out=data; by &byvar &rankvar2 &timevar; var &var; run; *Find L-H difference; proc sort data=&data out=sum; by &byvar &timevar &rankvar2; run; data sum_diff; set sum; by &byvar &timevar &rankvar2; if first.&timevar or last.&timevar; if first.&timevar then &rankvar2=1; if last.&timevar then &rankvar2=2; run; proc transpose data=sum_diff out=sum_diff2; by &byvar &timevar; var &var; id &rankvar2; run; data sum_diff2; set sum_diff2; &rankvar2=99; col1 = _1 - _2; drop _2 _1; run; data sum_diff2; set data sum_diff2; run; proc sort data=sum_diff2; by &byvar _name_ &rankvar2; run; proc sql; create table sum_diff3 as select a.*,(a.col1-b.rf*10000) as exret from sum_diff2 as a left join taq.rf as b on intck('week1.4',a.endofweek, b.endofweek)=1; quit; proc sort data=sum_diff3; by &byvar _name_ &rankvar2 &timevar; run; data sum_diff3; set sum_diff3; if &rankvar2=99 then exret=col1; run; proc sort data=sum_diff3; by &byvar _name_ &rankvar2 &timevar; run; /*Cumulative returns*/ data group99; set sum_diff3; by weight endofweek; where group=99; run; proc sort data=group99 by weight endofweek; run; data group99_cum; set group99; by weight; retain cum_sum; cum_sum+log((1+col1/10000)); if weight^=lag(weight) then cum_sum=log((1+col1/10000)); run; data group99_cum2_&measure_name; set group99_cum; by weight; cum_ret=exp(cum_sum); run; /**Sharpe Ratio*/ /* data sharpe; set sum_diff3; by &byvar _name_ &rankvar2 &timevar; where &rankvar2=99; run; proc means data=sharpe; by weight; var col1; output out=sharperatio mean=mean_ret std=error; run; data sharperatio2; set sharperatio; by weight; sharpe=mean_ret/error*sqrt(252/5); run; proc export data=sharperatio2 outfile= "E:\sharperatio.xlsx " dbms=xlsx replace; sheet="&measure_name"; run; */ %let i = %eval(&i + 1); %end; %mend sharpe; %sharpe(measure_list=&measure_list); proc sql; create table cum_ret as select a.*, b.cum_ret as cum_ret_rsj, c.cum_ret as cum_ret_srvlj, d.cum_ret as cum_ret_srvsj, e.cum_ret as cum_ret_rsk from group99_cum2_r_srvj as a, group99_cum2_rsj as b, group99_cum2_r_srvlj as c, group99_cum2_r_srvsj as d, group99_cum2_rsk as e where a.endofweek=b.endofweek=c.endofweek=d.endofweek=e.endofweek and a.weight=b.weight=c.weight=d.weight=e.weight; quit; proc sgplot data=cum_ret; where weight='EW'; format endofweek year.; series x=endofweek y=cum_ret/ legendlabel='SRVJ' name='lineA' lineattrs=(pattern=shortdash) lineattrs=(color=blue); series x=endofweek y=cum_ret_rsj/ legendlabel='RSJ' name='lineB' lineattrs=(pattern=dashdotdot) lineattrs=(color=red); series x=endofweek y=cum_ret_srvlj/ legendlabel='SRVLJ' name='lineC' lineattrs=(pattern=solid) lineattrs=(color=green); series x=endofweek y=cum_ret_srvsj/ legendlabel='SRVSJ' name='lineD' lineattrs=(pattern=shortdashdot) lineattrs=(color=purple); series x=endofweek y=cum_ret_rsk/ legendlabel='RSK' name='lineE' lineattrs=(pattern=longdashshortdash) lineattrs=(color=black); xaxis values=('01jan1993'd to '31dec2016'd by year); yaxis label="Portfolio Value"; keylegend 'lineA' 'lineB' 'lineC' 'lineD' 'lineE' / location=inside position=topright; RUN; proc sgplot data=cum_ret; where weight='VW'; format endofweek year.; series x=endofweek y=cum_ret/ legendlabel='SRVJ' name='lineA' lineattrs=(pattern=shortdash) lineattrs=(color=blue); series x=endofweek y=cum_ret_rsj/ legendlabel='RSJ' name='lineB' lineattrs=(pattern=dashdotdot) lineattrs=(color=red); series x=endofweek y=cum_ret_srvlj/ legendlabel='SRVLJ' name='lineC' lineattrs=(pattern=solid) lineattrs=(color=green); series x=endofweek y=cum_ret_srvsj/ legendlabel='SRVSJ' name='lineD' lineattrs=(pattern=shortdashdot) lineattrs=(color=purple); series x=endofweek y=cum_ret_rsk/ legendlabel='RSK' name='lineE' lineattrs=(pattern=longdashshortdash) lineattrs=(color=black); xaxis values=('01jan1993'd to '31dec2016'd by year); yaxis label="Portfolio Value"; keylegend 'lineA' 'lineB' 'lineC' 'lineD' 'lineE' / location=inside position=topright; RUN; /****************************************************************************************************/ /*Replicating results in table 13-15*/ /***Industry double sort --independent***/ options notes; proc sql; create table sample as select a.*, b.me, c.weekly_srvj from taq.leadret as a, taq.me as b, taq.weekly_srvj as c where a.permno=b.permno=c.permno and a.endofweek=b.endofweek=c.endofweek; quit; data sample; set sample; leadret=leadret*10000; run; proc sort data=sample; by endofweek; run; proc sql; create table sample2 as select a.*, b.FFI49_desc, b.FFI49, b.newdate from sample a left join taq.industry b on a.permno=b.permno and intck('month', b.newdate, a.endofweek) between 1 and 12; quit; proc sort data=sample2; by endofweek ffi49; run; /**value-weighted average of industry jump measures*/ proc summary data=sample2; where weekly_srvj^=. and ffi49^=.; by endofweek ffi49; var weekly_srvj; weight me; output out=vw_ave_ind mean=w_weekly_srvj; run; proc sql; create table stock_ind as select a.*, b.w_weekly_srvj from sample2 as a, vw_ave_ind as b where a.endofweek=b.endofweek and a.ffi49=b.ffi49; quit; proc sort data=stock_ind; by endofweek ffi49; run; /***breakpoints****/ proc univariate data=stock_ind noprint; where weekly_srvj^=. and ffi49^=.; var w_weekly_srvj; by endofweek; output out=break pctlpts= 20 40 60 80 pctlpre=dec; run; proc sql; create table port as select a.*, b.dec20, b.dec40,b.dec60,b.dec80 from stock_ind as a left join break as b on a.endofweek=b.endofweek; quit; data port2; set port; If weekly_srvj^=. and ffi49^=. then do; if w_weekly_srvj<=dec20 then group1=1; else if w_weekly_srvj>dec20 and w_weekly_srvj<=dec40 then group1=2; else if w_weekly_srvj>dec40 and w_weekly_srvj<=dec60 then group1=3; else if w_weekly_srvj>dec60 and w_weekly_srvj<=dec80 then group1=4; else if w_weekly_srvj>dec80 then group1=5; else group1=.; end; drop dec:; run; proc sort data=port2; by endofweek group1; run; data port2; set port2; rename me= weight_port; run; proc univariate data=stock_ind noprint; where weekly_srvj^=. and ffi49^=.; var weekly_srvj; by endofweek; output out=break2 pctlpts= 20 40 60 80 pctlpre=rank2var; run; proc sql; create table port3 as select a.*, b.rank2var20, b.rank2var40,b.rank2var60,b.rank2var80 from port2 as a left join break2 as b on a.endofweek=b.endofweek; quit; data port4; set port3; If weekly_srvj^=. and ffi49^=. then do; if weekly_srvj<=rank2var20 then group2=1; else if weekly_srvj>rank2var20 and weekly_srvj<=rank2var40 then group2=2; else if weekly_srvj>rank2var40 and weekly_srvj<=rank2var60 then group2=3; else if weekly_srvj>rank2var60 and weekly_srvj<=rank2var80 then group2=4; else if weekly_srvj>rank2var80 then group2=5; else group2=.; end; drop rank2var:; run; proc sort data=port4; by endofweek group1 group2; run; proc means data=port4 noprint; by endofweek group1 group2; var leadret; output out=ewmeanret mean=; run; data ewmeanret; set ewmeanret; weight='EW'; run; proc means data=port4 noprint; by endofweek group1 group2; var leadret; weight weight_port; output out=vwmeanret mean=; run; data vwmeanret; set vwmeanret; weight='VW'; run; data meanret; set ewmeanret vwmeanret; run; data meanret; set meanret; if group1=. or group2=. then delete; run; proc sort data=meanret; by weight group1 group2 endofweek; run; proc transpose data=meanret out=data; by weight group1 group2 endofweek; var leadret; run; proc sort data=data; by _name_ weight group1 group2 endofweek; run; proc sort data=meanret out=sum; by weight group1 endofweek group2;run; data sum_diff; set sum(where=(group2>-1)); by weight group1 endofweek group2; if first.endofweek or last.endofweek; if first.endofweek then group2=1; if last.endofweek then group2=2; run; proc transpose data=sum_diff out=sum_diff2; by weight group1 endofweek; var leadret; id group2; run; data sum_diff2; set sum_diff2; group2=99; col1 = _2 - _1; drop _2 _1; run; proc sql; create table sum_diff2_temp as select a.weight, a.group1, a.group2, a.endofweek, a._name_, a.col1 from sum_diff2 as a; quit; data sum_diff2; set data sum_diff2_temp;run; proc sort data=sum_diff2; by _name_ weight group2 endofweek group1; run; data sum_diff3; set sum_diff2(where=(group1>-1)); by _name_ weight group2 endofweek group1; if first.endofweek or last.endofweek; if first.endofweek then group1=1; if last.endofweek then group1=2; run; proc transpose data=sum_diff3 out=sum_diff3; by _name_ weight group2 endofweek; var col1; id group1; run; data sum_diff3; set sum_diff3; group1=100; col1 = _2 - _1; drop _2 _1; run; proc sql; create table sum_diff33 as select a.weight, a.group1, a.group2, a.endofweek, a._name_, a.col1 from sum_diff3 as a; quit; data sum_diff3; set sum_diff33 sum_diff2;run; proc sort data=sum_diff3; by _name_ weight group1 group2 endofweek;run; proc sql; create table sum_diff4 as select a.*,(a.col1-b.rf*10000) as exret, b.mktrf*10000 as mkt_rf, b.smb*10000 as smb,b.hml*10000 as hml, b.umd*10000 as umd from sum_diff3 as a left join taq.ffc4 as b on intck('week1.4',a.endofweek, b.endofweek)=1; quit; data sum_diff4; set sum_diff4; if group1=100 or group2=99 then exret=col1; run; proc sort data=sum_diff4; by _name_ weight group1 group2 endofweek; run; data sum_hl; set sum_diff4; where group1=100 or group2=99; run; data sum_diff5; set sum_diff4; where group1^=100 and group2^=99; run; proc sort data=sum_hl; by _name_ weight group1 group2 endofweek; run; proc sort data=sum_diff5; by _name_ weight group1 group2 endofweek; run; data sum_diff100; set sum_hl; where group1=100; run; data sum_diff99; set sum_hl; where group2=99; run; proc sort data=sum_diff100; by _name_ weight group1 group2 endofweek;run; proc sort data=sum_diff99; by _name_ weight group1 group2 endofweek;run; ***average return; options nonotes; proc model data=sum_diff5; by weight _name_ group1 group2; parms a; exogenous col1 ; instruments / intonly; col1=a; fit col1 / gmm kernel=(bart, %eval(3+1), 0); ods output parameterestimates=param0 fitstatistics=fitresult OutputStatistics=residual; quit; data param0; set param0; type='Average ret';run; proc model data=sum_diff5; by weight _name_ group1 group2; parms a b1 b2 b3 b4; instruments mkt_rf smb hml umd; exret =a+b1* mkt_rf+b2*smb+b3*hml+b4*umd; fit exret / gmm kernel=(bart, %eval(3+1), 0); ods output parameterestimates=param3 fitstatistics=fitresult OutputStatistics=residual; quit; data param3; set param3; type='Carhart4 Alpha'; if parameter='a'; run; proc sort data=sum_diff100; by weight _name_ group1 group2 endofweek; run; proc model data=sum_diff100; by weight _name_ group1 group2; parms a; exogenous col1 ; instruments / intonly; col1=a; fit col1 / gmm kernel=(bart, %eval(3+1), 0); ods output parameterestimates=param0_hl100 fitstatistics=fitresult OutputStatistics=residual; quit; data param0_hl100; set param0_hl100; type='Average ret';run; proc model data=sum_diff100; by weight _name_ group1 group2; parms a b1 b2 b3 b4; instruments mkt_rf smb hml umd; exret =a+b1* mkt_rf+b2*smb+b3*hml+b4*umd; fit exret / gmm kernel=(bart, %eval(3+1), 0); ods output parameterestimates=param3_hl100 fitstatistics=fitresult OutputStatistics=residual; quit; data param3_hl100; set param3_hl100; type='Carhart4 Alpha'; if parameter='a'; run; proc sort data=sum_diff100; by weight _name_ group1 endofweek group2; run; proc summary data=sum_diff100; where group1^=. and group2^=99; by weight _name_ group1 endofweek; var col1; output out=mean_hl100 mean=mean_hl100; run; proc sort data=mean_hl100; by weight _name_ group1 endofweek; run; proc model data=mean_hl100; by weight _name_ group1; parms a; exogenous mean_hl100 ; instruments / intonly; mean_hl100=a; fit mean_hl100 / gmm kernel=(bart, %eval(3+1), 0); ods output parameterestimates=param0_mean100 fitstatistics=fitresult OutputStatistics=residual; quit; data param0_mean100; set param0_mean100; type='Average ret';run; proc sql; create table mean_hl100_ff4 as select a.*, b.mktrf*10000 as mkt_rf, b.smb*10000 as smb,b.hml*10000 as hml, b.umd*10000 as umd from mean_hl100 as a left join taq.ffc4 as b on intck('week1.4',a.endofweek, b.endofweek)=1; quit; proc sort data=mean_hl100_ff4; by weight _name_ group1; run; proc model data=mean_hl100_ff4; by weight _name_ group1 ; parms a b1 b2 b3 b4; instruments mkt_rf smb hml umd; mean_hl100 =a+b1* mkt_rf+b2*smb+b3*hml+b4*umd; fit mean_hl100 / gmm kernel=(bart, %eval(3+1), 0); ods output parameterestimates=param3_mean_hl100 fitstatistics=fitresult OutputStatistics=residual; quit; data param3_mean_hl100; set param3_mean_hl100; type='Carhart4 Alpha'; if parameter='a'; run; proc sql; create table mean_hl99_ff4 as select a.*, b.mktrf*10000 as mkt_rf, b.smb*10000 as smb,b.hml*10000 as hml, b.umd*10000 as umd from mean_hl99 as a left join taq.ffc4 as b on intck('week1.4',a.endofweek, b.endofweek)=1; quit; proc sort data=mean_hl99_ff4; by weight _name_ group2; run; proc model data=mean_hl99_ff4; by weight _name_ group2 ; parms a b1 b2 b3 b4; instruments mkt_rf smb hml umd; mean_hl99 =a+b1* mkt_rf+b2*smb+b3*hml+b4*umd; fit mean_hl99 / gmm kernel=(bart, %eval(3+1), 0); ods output parameterestimates=param3_mean_hl99 fitstatistics=fitresult OutputStatistics=residual; quit; data param3_mean_hl99; set param3_mean_hl99; type='Carhart4 Alpha'; if parameter='a'; run; proc sort data=sum_diff99; by weight _name_ group1 group2 endofweek;run; proc model data=sum_diff99; by weight _name_ group1 group2; parms a; exogenous col1 ; instruments / intonly; col1=a; fit col1 / gmm kernel=(bart, %eval(3+1), 0); ods output parameterestimates=param0_hl99 fitstatistics=fitresult OutputStatistics=residual; quit; data param0_hl99; set param0_hl99; type='Average ret';run; proc model data=sum_diff99; by weight _name_ group1 group2; parms a b1 b2 b3 b4; instruments mkt_rf smb hml umd; exret =a+b1* mkt_rf+b2*smb+b3*hml+b4*umd; fit exret / gmm kernel=(bart, %eval(3+1), 0); ods output parameterestimates=param3_hl99 fitstatistics=fitresult OutputStatistics=residual; quit; data param3_hl99; set param3_hl99; type='Carhart4 Alpha'; if parameter='a'; run; proc sort data=sum_diff99; by weight _name_ group2 endofweek group1;run; proc summary data=sum_diff99; where group1^=100 and group2^=.; by weight _name_ group2 endofweek; var col1; output out=mean_hl99 mean=mean_hl99; run; proc sort data=mean_hl99; by weight _name_ group2 endofweek; run; proc model data=mean_hl99; by weight _name_ group2; parms a; exogenous mean_hl99 ; instruments / intonly; mean_hl99=a; fit mean_hl99 / gmm kernel=(bart, %eval(3+1), 0); ods output parameterestimates=param0_mean99 fitstatistics=fitresult OutputStatistics=residual; quit; data param0_mean99; set param0_mean99; type='Average ret';run; /***************************************************************/ /*Replicating figure 4*/ OPTION NOTES; /**average fraction**/ proc sort data=port4; by group1 group2 endofweek; run; proc summary data=port4; where group1^=. and group2^=.; by group1 group2 endofweek; var leadret; output out=N_25 N=N_25; run; PROC SORT DATA=N_25; by endofweek; run; proc summary data=N_25; by endofweek; var N_25; output out=total_25 sum=Total_25; run; proc sql; create table frac_25 as select a.*, b.total_25 from N_25 as a, total_25 as b where a.endofweek=b.endofweek; quit; data frac2_25; set frac_25; frac=N_25/total_25; run; proc sort data=frac2_25; by group1 group2 endofweek; run; proc summary data=frac2_25; by group1 group2; var frac; output ouot=frac3_25 mean=mean_frac_25; run; /*Fraction of market value*/ proc sort data=port4; by group1 group2 endofweek; run; proc summary data=port4; where group1^=. and group2^=.; by group1 group2 endofweek; var weight_port; output out=me_25 sum=sum_me; run; PROC SORT DATA=me_25; by endofweek; run; proc summary data=me_25; by endofweek; var sum_me; output out=total_me_25 sum=Total_me_25; run; proc sql; create table frac_me_25 as select a.*, b.total_me_25 from me_25 as a, total_me_25 as b where a.endofweek=b.endofweek; quit; data frac2_me_25; set frac_me_25; frac=sum_me/total_me_25; run; proc sort data=frac2_me_25; by group1 group2 endofweek; run; proc summary data=frac2_me_25; by group1 group2; var frac; output out=frac3_me_25 mean=mean_frac_me_25; run; PROC EXPORT DATA= frac3_25 outfile= "E:\frac25.xlsx " dbms=xlsx replace; sheet="frac25"; run; PROC EXPORT DATA= frac3_me_25 outfile= "E:\frac25.xlsx " dbms=xlsx replace; sheet="frac_me_25"; run; /***************************************************************/ /*Replicating results in table 16-17*/ /*Fama-MacBeth Regressions*/ data reg; set taq.sample; run; %let timevar = endofweek; options notes; proc sort data=reg; by &timevar; run; %let y=leadret ; proc reg data=reg outest=fm noprint; by endofweek; model &y = weekly_rvljp weekly_rvljn /adjrsq; model &y = weekly_rvsjp weekly_rvsjn /adjrsq; model &y = weekly_rvjp weekly_rvjn /adjrsq; model &y = weekly_rvljp weekly_rvljn weekly_rvsjp weekly_rvsjn /adjrsq; model &y = weekly_srvj /adjrsq; model &y = weekly_srvlj /adjrsq; model &y = weekly_srvsj /adjrsq; model &y = weekly_srvlj weekly_srvsj /adjrsq; model &y = weekly_rsj /adjrsq; model &y = weekly_rvljp weekly_rvljn weekly_rvol weekly_rsk weekly_rkt /adjrsq; model &y = weekly_rvsjp weekly_rvsjn weekly_rvol weekly_rsk weekly_rkt /adjrsq; model &y = weekly_rvjp weekly_rvjn weekly_rvol weekly_rsk weekly_rkt /adjrsq; model &y = weekly_rvljp weekly_rvljn weekly_rvsjp weekly_rvsjn weekly_rvol weekly_rsk weekly_rkt /adjrsq; model &y = weekly_srvj weekly_rvol weekly_rsk weekly_rkt /adjrsq; model &y = weekly_srvlj weekly_rvol weekly_rsk weekly_rkt /adjrsq; model &y = weekly_srvsj weekly_rvol weekly_rsk weekly_rkt /adjrsq; model &y = weekly_srvlj weekly_srvsj weekly_rvol weekly_rsk weekly_rkt /adjrsq; model &y = weekly_rsj weekly_rvol weekly_rsk weekly_rkt /adjrsq; quit; proc sort data=fm; by _model_ endofweek; run; data fm2; set fm; drop &y _TYPE_ _DEPVAR_ _RMSE_ _IN_ _P_ _EDF_; rename _model_=model; run; proc transpose data=fm2 out=fm3 name=name prefix=coef; by model endofweek; run; data fm3; set fm3; retain code; by model endofweek; code=code+1; if first.endofweek then code=1; run; proc sort data=fm3; by model code name; run; %let lag=3;*lag=6; proc model data=fm3; by model code name; parms a; exogenous coef1 ; instruments / intonly; coef1 =a; fit coef1 / gmm kernel=(bart, %eval(&lag+1), 0); ods output parameterestimates=param fitstatistics=fitresult OutputStatistics=residual; quit; PROC EXPORT DATA=param outfile= "E:\fm_nocontrol.xlsx " dbms=xlsx replace; sheet="param"; run; proc reg data=reg outest=fmc noprint; by endofweek; model &y = weekly_rvljp weekly_rvljn beta logme beme mom rev ivol csk ckt max_logret min_logret log_illiq/adjrsq; model &y = weekly_rvsjp weekly_rvsjn beta logme beme mom rev ivol csk ckt max_logret min_logret log_illiq/adjrsq; model &y = weekly_rvjp weekly_rvjn beta logme beme mom rev ivol csk ckt max_logret min_logret log_illiq/adjrsq; model &y = weekly_rvljp weekly_rvljn weekly_rvsjp weekly_rvsjn beta logme beme mom rev ivol csk ckt max_logret min_logret log_illiq /adjrsq; model &y = weekly_srvj beta logme beme mom rev ivol csk ckt max_logret min_logret log_illiq/adjrsq; model &y = weekly_srvlj beta logme beme mom rev ivol csk ckt max_logret min_logret log_illiq/adjrsq; model &y = weekly_srvsj beta logme beme mom rev ivol csk ckt max_logret min_logret log_illiq/adjrsq; model &y = weekly_srvlj weekly_srvsj beta logme beme mom rev ivol csk ckt max_logret min_logret log_illiq/adjrsq; model &y = weekly_rsj beta logme beme mom rev ivol csk ckt max_logret min_logret log_illiq/adjrsq; model &y = weekly_rvljp weekly_rvljn weekly_rvol weekly_rsk weekly_rkt beta logme beme mom rev ivol csk ckt max_logret min_logret log_illiq/adjrsq; model &y = weekly_rvsjp weekly_rvsjn weekly_rvol weekly_rsk weekly_rkt beta logme beme mom rev ivol csk ckt max_logret min_logret log_illiq/adjrsq; model &y = weekly_rvjp weekly_rvjn weekly_rvol weekly_rsk weekly_rkt beta logme beme mom rev ivol csk ckt max_logret min_logret log_illiq/adjrsq; model &y = weekly_rvljp weekly_rvljn weekly_rvsjp weekly_rvsjn weekly_rvol weekly_rsk weekly_rkt beta logme beme mom rev ivol csk ckt max_logret min_logret log_illiq/adjrsq; model &y = weekly_srvj weekly_rvol weekly_rsk weekly_rkt beta logme beme mom rev ivol csk ckt max_logret min_logret log_illiq/adjrsq; model &y = weekly_srvlj weekly_rvol weekly_rsk weekly_rkt beta logme beme mom rev ivol csk ckt max_logret min_logret log_illiq/adjrsq; model &y = weekly_srvsj weekly_rvol weekly_rsk weekly_rkt beta logme beme mom rev ivol csk ckt max_logret min_logret log_illiq/adjrsq; model &y = weekly_srvlj weekly_srvsj weekly_rvol weekly_rsk weekly_rkt beta logme beme mom rev ivol csk ckt max_logret min_logret log_illiq/adjrsq; model &y = weekly_rsj weekly_rvol weekly_rsk weekly_rkt beta logme beme mom rev ivol csk ckt max_logret min_logret log_illiq/adjrsq; quit; proc sort data=fmc; by _model_ endofweek; run; data fmc2; set fmc; drop &y _TYPE_ _DEPVAR_ _RMSE_ _IN_ _P_ _EDF_; rename _model_=model; run; proc transpose data=fmc2 out=fmc3 name=name prefix=coef; by model endofweek; run; data fmc3; set fmc3; retain code; by model endofweek; code=code+1; if first.endofweek then code=1; run; proc sort data=fmc3; by model code name; run; %let lag=3;*lag=6; proc model data=fmc3; by model code name; parms a; exogenous coef1 ; instruments / intonly; coef1 =a; fit coef1 / gmm kernel=(bart, %eval(&lag+1), 0); ods output parameterestimates=param2 fitstatistics=fitresult OutputStatistics=residual; quit; PROC EXPORT DATA=param2 outfile= "E:\fm_control.xlsx " dbms=xlsx replace; sheet="param2"; run;